//@+leo-ver=5-thin
//@+node:caminhante.20200309141158.1: * @file ./utf8.h
//@@tabwidth -2
#ifndef _UTF8_H_
#define _UTF8_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
//@+others
//@+node:caminhante.20231212192422.1: ** struct uchar
// A unicode string is a array of `struct uchar` objects, terminated with a 'struct uchar' with `.bytes == 0`.
// A `\0` byte isn't considered a valid unicode char.
struct uchar {
  uint8_t bytes;
  union {
    char chars[4];
    uint32_t ichars;
  };
};
//@+node:caminhante.20200309141158.2: ** uchar_valid
// [ valid UTF8 byte sequence => true | false ]
bool uchar_valid (char* source);
//@+node:caminhante.20200309141158.3: ** uchar_bytes
// [ valid UTF8 byte sequence =>
  // number of bytes occupied by a valid UTF8 byte sequence, between 1 and 4 | 0 ]
size_t uchar_bytes (char* source);
//@+node:caminhante.20200309141158.4: ** ustring_length
// [ sequence of valid UTF8 byte sequences =>
  // number of valid consecutive UTF8 byte sequences, greater or equal than 1 | 0 ]
size_t ustring_length (char* source);
//@+node:caminhante.20200309141158.5: ** ustring_bytes
// [ sequence of valid UTF8 byte sequences =>
  // the number of bytes occupied by valid consecutive UTF8 byte sequences,
  // greater or equal than 1 | 0 ]
size_t ustring_bytes (char* source);
//@+node:caminhante.20200309141158.6: ** cstring_bytes
// [ sequence of `struct uchar` UTF byte sequences =>
  // number of bytes required to convert it to a conventional `\0` terminated `char` array ]
size_t cstring_bytes (struct uchar* source);
//@+node:caminhante.20200309141158.7: ** next_uchar
// [ valid UTF8 byte sequence =>
  // a correctly initializated `struct uchar` object |
  // a `struct uchar` object with `.bytes == 0` ]
struct uchar next_uchar (char* source);
//@+node:caminhante.20200309141158.8: ** c_to_ustring
// [ a `char` array containing potentially valid UTF8 text =>
  // a `struct uchar` array with all consecutive UTF8 valid byte sequences is written at `*destination` ]
// You need to calc the needed `struct uchar` array length beforehand,
  // with `ustring_length(source)`
void c_to_ustring (char* source, struct uchar* destination);
//@+node:caminhante.20200309141158.9: ** u_to_cstring
// [ a `struct uchar` array containing potentially valid UTF8 text =>
  // a `\0` terminated `char` array is written at `*destination` ]
// You need to calc the needed `char` array length beforehand, summing all
  // `struct uchar` `.bytes` members plus 1 (accounting for a extra `\0` byte at the end
void u_to_cstring (struct uchar* source, char* destination);
//@+node:caminhante.20200309141158.10: ** uchar_puts
// [ a `struct uchar` object =>
  // side effect: output UTF8 byte sequence at file descriptor, returns number of written bytes ]
size_t uchar_puts (int fileno, struct uchar* uc);
//@+node:caminhante.20200309141158.11: ** ustring_puts
// [ sequence of `struct uchar` objects =>
  // side effect: output all UTF8 byte sequences at file descriptor, returns number of written bytes ]
size_t ustring_puts (int fileno, struct uchar* ustring);
//@-others
#endif
//@-leo
